Submitted by: Susan Bataju
For this Lab, "Tomato leaf disease detection" image dataset by Kaustubh B. was selected from Kaggle[1]. The dataset has ten different types of diseases for tomato leaves, they are listed below. For each type of disease there are 10000 images in train folder and 1000 images in test folder. The task is to carryout multiclass classification. Here is the list of the diesease:
All the images in the dataset are of same shape and imported as 256x256x3 numpy arrays. Then, for the first part all images are reshaped as 128x128x3 arrays later into 224x224x3. As the dataset is already split in 10000 train and 1000 test, I further divide train set into 9000 actual train set and 1000 validataion set. It would mean that validation set and test set are equal in size. Lastly all the class have equal number of images.
import tensorflow as tf
print("Num of GPUs available: ", len(tf.test.gpu_device_name()))
# Num of GPUs available: 13
import os
import cv2
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import os
from sklearn import metrics as mt
import matplotlib.image as mpimg
from keras.models import Sequential
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation, Flatten, Dropout, Dense
from tensorflow.keras.layers import BatchNormalization
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import LabelBinarizer
# import seaborn as sns
from skimage.io import imshow
from sklearn.metrics import plot_confusion_matrix
from tensorflow.keras.utils import plot_model
from skimage.transform import resize
# import pandas as pd
import tensorflow.keras as keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Reshape, Input
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import average
from tensorflow.keras.models import Model
from PIL import Image
plt.style.use('ggplot')
def plot_history(history):
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(len(acc))
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
#https://stackoverflow.com/a/29877565/20626850
def plot_confusion_matrix(df_confusion,label, title='Confusion matrix', cmap=plt.cm.winter):
df_confusion = df_confusion/sum(df_confusion[0])
plt.matshow(df_confusion, cmap=cmap,fignum=10) # imshow
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(label))
plt.xticks(tick_marks, label, rotation=90)
plt.yticks(tick_marks,label)
for (i, j), z in np.ndenumerate(df_confusion):
plt.text(j, i, '{:0.2f}'.format(z), ha='center', va='center',color='black')
# plt.tight_layout()
plt.grid()
plt.ylabel('Actual')
plt.xlabel('Predicted')
def img_to_np(DIR,flatten=True,newsize = (128,128)):
img = Image.open(DIR)
img = img.resize(newsize)
return np.asarray(img)
Importing the dataset, resizing the images and reading the labels.
TEST_DIR="/users/sbataju/CS5324/tomato/val/"
label={}
index=0
for FOLDER in os.listdir(TEST_DIR):
# print(FOLDER,index)
label[index]=FOLDER
index=index+1
inv_label = {v: k for k, v in label.items()}
NUM_CLASSES = 10
index=0
data=[]
for FOLDER in os.listdir(TEST_DIR):
print(FOLDER,inv_label[FOLDER])
for image_dir in os.listdir(TEST_DIR+FOLDER):
data.append({"x":img_to_np(TEST_DIR+FOLDER+"/"+image_dir),"y":label[inv_label[FOLDER]]})
# assert False
index=index+1
x,y=[],[]
for obj in data:
x.append(obj["x"])
y.append(obj["y"])
x_test = np.array(x)
y_test = np.array([inv_label[i] for i in y],dtype=np.float16)
TRAIN_DIR = "/users/sbataju/CS5324/tomato/train/"
index=0
data=[]
for FOLDER in os.listdir(TRAIN_DIR):
print(FOLDER,' ',inv_label[FOLDER])
for image_dir in os.listdir(TRAIN_DIR+FOLDER):
data.append({"x":img_to_np(TRAIN_DIR+FOLDER+"/"+image_dir,flatten=False),"y":label[inv_label[FOLDER]]})
index=index+1
x,y=[],[]
for obj in data:
x.append(obj["x"])
y.append(obj["y"])
x_train = np.array(x)
y_train = np.array([inv_label[i] for i in y],dtype=np.float16)
y_train_ohe = keras.utils.to_categorical(y_train, NUM_CLASSES)
y_test_ohe = keras.utils.to_categorical(y_test, NUM_CLASSES)
Plotting random 25 images.
plt.figure(figsize=(15,15))
for i in range(1,26):
idx = np.random.randint(0,len(y))
plt.subplot(5,5,i)
plt.imshow(x[idx])
plt.title(" ".join(y[idx].split('_')[1:]))
plt.grid()
plt.show()
# print(i)
Split the the train set into train and validatation dataset. As stated before 10% of the toal train data is used as validation which has same size as test set. Throughout the model testing below, this split is used partly motivated because it would be a baseline to many variations and changes which also makes it difficult to keep track of Cross Validatation if CV was used in all of them.
from sklearn.model_selection import train_test_split
X_train, X_val, Y_train, Y_val = train_test_split(
x_train, y_train_ohe, test_size=0.1, random_state=42,stratify=y_train,)
label
Here it can be seen the number of images in each class in each dataset. It can be seen, all the class have equal number of images and the number of images in each dataset as well.
plt.hist(np.argmax(y_test_ohe,axis=-1),histtype='bar', ec='black',orientation="horizontal")
plt.yticks(list(label.keys()),list(label.values()))
plt.title(' Number of images for Testing')
plt.show()
plt.hist(np.argmax(Y_val,axis=-1),histtype='bar', ec='black',orientation="horizontal")
plt.yticks(list(label.keys()),list(label.values()))
plt.title(' Number of images for Validation')
plt.show()
plt.hist(np.argmax(Y_train,axis=-1),histtype='bar', ec='black',orientation="horizontal")
plt.yticks(list(label.keys()),list(label.values()))
plt.title(' Number of images for Training')
plt.show()
Here all the dataset are normalized then, data augmentation module is fitted giving some rotation, shifts, zoom, etc.
X_train,X_val =X_train/255.0 , X_val/255.0
x_test = x_test/255.0
NUM_CLASSES = 10
datagen_train = ImageDataGenerator(
featurewise_center=False,
samplewise_center=False,
featurewise_std_normalization=False,
samplewise_std_normalization=False,
zca_whitening=False,
rotation_range=180, # used, Int. Degree range for random rotations.
width_shift_range=0.2, # used, Float (fraction of total width). Range for random horizontal shifts.
height_shift_range=0.2, # used, Float (fraction of total height). Range for random vertical shifts.
shear_range=0.1, # Float. Shear Intensity (Shear angle in counter-clockwise direction as radians)
zoom_range=0.4,
channel_shift_range=0.,
fill_mode='nearest',
cval=0.,
# brightness_range=[0.2,1],
horizontal_flip=True,
vertical_flip=True,
rescale=None)
datagen_val = ImageDataGenerator(
featurewise_center=False,
samplewise_center=False,
featurewise_std_normalization=False,
samplewise_std_normalization=False,
zca_whitening=False,
rotation_range=180, # used, Int. Degree range for random rotations.
width_shift_range=0.2, # used, Float (fraction of total width). Range for random horizontal shifts.
height_shift_range=0.2, # used, Float (fraction of total height). Range for random vertical shifts.
shear_range=0.1, # Float. Shear Intensity (Shear angle in counter-clockwise direction as radians)
zoom_range=0.4,
channel_shift_range=0.,
fill_mode='nearest',
cval=0.,
# brightness_range=[0.2,1],
horizontal_flip=True,
vertical_flip=True,
rescale=None)
img_wh = 128
datagen_train.fit(X_train)
datagen_val.fit(X_val)
ploting augmentated images
tmps = datagen_train.flow(X_train, Y_train, batch_size=3)
for i,tmp_ in enumerate(tmps):
# imshow(tmp[i].squeeze(),cmap='bone')
for j,tmp in enumerate(tmp_):
if j == 0:
for k,t in enumerate(tmp):
plt.subplot(1,3,k+1)
plt.imshow(t)
plt.grid()
else:
plt.title(label[np.argmax(tmp)])
if i ==10: break
plt.show()
Let's copy and build the example CNN architectures given in our class notebook.
First, a very simple CNN with only two Convolution layer on top with first layer with 32 filters and kernel of 3x3 and second layer with 64 filters and kernel of 3x3. Then, a max pool layer with kernal size of 2x2, 25% Drop out, a flatten layer, 128 node dense layer and 50% drop out layer and finally a softmax output layer.
I was having trouble plotting the models in this environment so instead I have the screenshots.
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
fig = plt.figure(figsize=(60,2))
img = mpimg.imread('cnn.png')
plt.imshow(img)
plt.show()
%%time
cnn = Sequential()
# let's start with an AlexNet style convolutional phase
cnn.add(Conv2D(filters=32,
input_shape = (img_wh,img_wh,3),
kernel_size=(3,3),
padding='same',
activation='relu', data_format="channels_last")) # more compact syntax
# no max pool before next conv layer!!
cnn.add(Conv2D(filters=64,
kernel_size=(3,3),
padding='same',
activation='relu')) # more compact syntax
cnn.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_last"))
# add one layer on flattened output
cnn.add(Dropout(0.25)) # add some dropout for regularization after conv layers
cnn.add(Flatten())
cnn.add(Dense(128, activation='relu'))
cnn.add(Dropout(0.5)) # add some dropout for regularization, again!
cnn.add(Dense(NUM_CLASSES, activation='softmax'))
# Let's train the model
cnn.compile(loss='categorical_crossentropy', # 'categorical_crossentropy' 'mean_squared_error'
optimizer='rmsprop', # 'adadelta' 'rmsprop'
metrics=['accuracy'])
cnn.summary()
# the flow method yields batches of images indefinitely, with the given transformations
BS = 32
epochs = 10
history = cnn.fit_generator(datagen_train.flow(X_train, Y_train, batch_size=BS),
steps_per_epoch=len(X_train)//BS, # how many generators to go through per epoch
epochs=epochs, verbose=1,
validation_data=datagen_val.flow(X_val,Y_val, batch_size=BS),
validation_steps=len(X_val)//BS,
callbacks=[EarlyStopping(monitor='val_loss', patience=3)],
)
For the first try not bad at all, the accuracy is decent there is room for improvement and at least no vanishing gradiant, validation set has higher accuracy indicating that our model is underfitting and we should add more complexity to the model. We can see the confusion matrix for unseen test data is preforming similarly in terms of accuracy. And training loss is trending down. When looking at the Confusion Matrix we can read the number as the percentage as there are 100 test events
plot_history(history)
yhat = np.argmax(cnn.predict(x_test), axis=1)
acc = mt.accuracy_score(y_test,yhat)
cm = mt.confusion_matrix(y_test,yhat)
# cm =cm/sum(cm[0])
plot_confusion_matrix(cm,list(label.values()),title=f'CM Test Acc:{acc:0.3f}')
yhat = np.argmax(cnn.predict(X_train), axis=1)
acc = mt.accuracy_score(np.argmax(Y_train,axis=-1),yhat)
cm = mt.confusion_matrix(np.argmax(Y_train,axis=-1),yhat)
# cm =cm/sum(cm[0])
plot_confusion_matrix(cm,list(label.values()),title=f'CM Train Acc:{acc:0.3f}')
yhat = np.argmax(cnn.predict(X_val), axis=1)
acc = mt.accuracy_score(np.argmax(Y_val,axis=-1),yhat)
cm = mt.confusion_matrix(np.argmax(Y_val,axis=-1),yhat)
# cm =cm/sum(cm[0])
plot_confusion_matrix(cm,list(label.values()),title=f'CM Val Acc: {acc:0.3f}')
The number of images in all the classes are equal so using accuracy as metric is performing nicely. The goal to preform multiclass classification is also achived as seen in the confusion matrix above. Using accuracy mertic will insure that the opmization will maximize the diagonal element in the confusion matrix.
Now, let's try a bit complex network: two initial convolution layer with 32 filters, kernal of 3x3 then a max pool layer of 2x2 then two convolution layer with 64 filters and kernal of 3x3 and a max pool layer of 2x2 then two convolution layer of 128 filters then a max pool, flatten, dropout in bewteen a dense layer and finally a output layer.
fig = plt.figure(figsize=(60,2))
img = mpimg.imread('cnn11.png')
plt.imshow(img)
plt.show()
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
l2_lambda = 0.0001
# Use Kaiming He to regularize ReLU layers: https://arxiv.org/pdf/1502.01852.pdf
# Use Glorot/Bengio for linear/sigmoid/softmax: http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf
cnn1 = Sequential()
cnn1.add(Conv2D(filters=32,
input_shape = (img_wh,img_wh,3),
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")) # more compact syntax
cnn1.add(Conv2D(filters=32,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',data_format="channels_last"))
cnn1.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_last"))
cnn1.add(Conv2D(filters=64,
input_shape = (img_wh,img_wh,3),
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',data_format="channels_last")) # more compact syntax
cnn1.add(Conv2D(filters=64,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu'))
cnn1.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_last"))
cnn1.add(Conv2D(filters=128,
input_shape = (img_wh,img_wh,3),
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',data_format="channels_last")) # more compact syntax
cnn1.add(Conv2D(filters=128,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',data_format="channels_last"))
cnn1.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_last"))
# add one layer on flattened output
cnn1.add(Flatten())
cnn1.add(Dropout(0.25)) # add some dropout for regularization after conv layers
cnn1.add(Dense(128,
activation='relu',
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda)
))
cnn1.add(Dropout(0.5)) # add some dropout for regularization, again!
cnn1.add(Dense(NUM_CLASSES,
activation='softmax',
kernel_initializer='glorot_uniform',
kernel_regularizer=l2(l2_lambda)
))
# Let's train the model
cnn1.compile(loss='categorical_crossentropy', # 'categorical_crossentropy' 'mean_squared_error'
optimizer='rmsprop', # 'adadelta' 'rmsprop'
metrics=['accuracy'])
cnn1.summary()
BS = 32
epochs = 10
history = cnn1.fit_generator(datagen_train.flow(X_train, Y_train, batch_size=BS),
steps_per_epoch=len(X_train)//BS, # how many generators to go through per epoch
epochs=epochs, verbose=1,
validation_data=datagen_val.flow(X_val,Y_val, batch_size=BS),
validation_steps=len(X_val)//BS,
callbacks=[EarlyStopping(monitor='val_loss', patience=4)],
)
plot_history(history)
yhat = np.argmax(cnn1.predict(x_test), axis=1)
acc = mt.accuracy_score(y_test,yhat)
cm = mt.confusion_matrix(y_test,yhat)
plot_confusion_matrix(cm,list(label.values()),title= f"CM Test Acc:{acc}")
yhat = np.argmax(cnn1.predict(X_train), axis=1)
acc = mt.accuracy_score(np.argmax(Y_train,axis=-1),yhat)
cm = mt.confusion_matrix(np.argmax(Y_train,axis=-1),yhat)
# cm =cm/sum(cm[0])
plot_confusion_matrix(cm,list(label.values()),title=f'CM Train Acc:{acc:0.3f}')
yhat = np.argmax(cnn1.predict(X_val), axis=1)
acc = mt.accuracy_score(np.argmax(Y_val,axis=-1),yhat)
cm = mt.confusion_matrix(np.argmax(Y_val,axis=-1),yhat)
# cm =cm/sum(cm[0])
plot_confusion_matrix(cm,list(label.values()),title=f'CM Val Acc: {acc:0.3f}')
There is almost 4% jump in the accuracy overall than the simple CNN. From the Confusion Matrix we can see only couple of class are being misclassified for example Late blight being predicted as Early blight. And the accruacy is same accross all the dataset, the difference we see in the epoch vs accuracy plot and in the Confusion Matrix could be because the epoch vs accuracy is made when using the agumented images where as the Confusion Matrix is using the original images.
This network is a combination of the above simple 6 layer CNN. After the two initial layer I have there branches with 32 and 64 filers convolution sandwhiched between maxpool layer then all the output is concatenate and output perdiction is made.
fig = plt.figure(figsize=(60,6))
img = mpimg.imread('cnn_ens1.png')
plt.imshow(img)
plt.show()
%%time
from tensorflow.keras.layers import Input, average, concatenate
from tensorflow.keras.models import Model
num_ensembles = 3
l2_lambda = 0.000001
input_holder = Input(shape=(img_wh, img_wh, 3))
# start with a conv layer
x = Conv2D(filters=32,
input_shape = (img_wh,img_wh,1),
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu', data_format="channels_last")(input_holder)
x = Conv2D(filters=32,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu')(x)
input_conv = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
branches = []
for _ in range(num_ensembles):
# start using NiN (MLPConv)
x = Conv2D(filters=32,
input_shape = (img_wh,img_wh,1),
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='linear', data_format="channels_last")(input_conv)
x = Conv2D(filters=32,
kernel_size=(1,1),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu', data_format="channels_last")(x)
x = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Conv2D(filters=64,
input_shape = (img_wh,img_wh,1),
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='linear', data_format="channels_last")(x)
x = Conv2D(filters=64,
kernel_size=(1,1),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='linear', data_format="channels_last")(x)
x = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
# add one layer on flattened output
x = Flatten()(x)
x = Dropout(0.50)(x) # add some dropout for regularization after conv layers
x = Dense(64,
activation='relu',
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda)
)(x)
x = Dense(NUM_CLASSES,
activation='relu',
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda)
)(x)
# now add this branch onto the master list
branches.append(x)
# that's it, we just need to average the results
x = concatenate(branches)
x = Dense(NUM_CLASSES,
activation='softmax',
kernel_initializer='glorot_uniform',
kernel_regularizer=l2(l2_lambda)
)(x)
# here is the secret sauce for setting the network using the
# Functional API:
cnn_ens = Model(inputs=input_holder,outputs=x)
cnn_ens.summary()
cnn_ens.compile(loss='categorical_crossentropy', # 'categorical_crossentropy' 'mean_squared_error'
optimizer='adam', # 'adadelta' 'rmsprop'
metrics=['accuracy'])
BS = 32
epochs = 10
history = cnn_ens.fit_generator(datagen_train.flow(X_train, Y_train, batch_size=BS),
steps_per_epoch=len(X_train)//BS, # how many generators to go through per epoch
epochs=epochs, verbose=1,
validation_data=datagen_val.flow(X_val,Y_val, batch_size=BS),
validation_steps=len(X_val)//BS,
callbacks=[EarlyStopping(monitor='val_loss', patience=4)],
)
plot_history(history)
yhat = np.argmax(cnn_ens.predict(x_test), axis=1)
acc = mt.accuracy_score(y_test,yhat)
cm = mt.confusion_matrix(y_test,yhat)
plot_confusion_matrix(cm,list(label.values()),title= f"CM Test Acc:{acc}")
Either the default hyperparameter is not working or the architecture is not suitable, either way I will leave it and move on to other techniques.
A Resnet style connection for middle convolution layers.
fig = plt.figure(figsize=(60,6))
img = mpimg.imread('resnet1.png')
plt.imshow(img)
plt.show()
%%time
from tensorflow.keras.layers import Add, Input
from tensorflow.keras.layers import average, concatenate
from tensorflow.keras.models import Model
input_holder = Input(shape=(img_wh, img_wh, 3))
# start with a conv layer
x = Conv2D(filters=32,
input_shape = (img_wh,img_wh,1),
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(input_holder)
x = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Conv2D(filters=32,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x)
x_split = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Conv2D(filters=64,
kernel_size=(1,1),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x_split)
x = Conv2D(filters=64,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x)
x = Conv2D(filters=32,
kernel_size=(1,1),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x)
# now add back in the split layer, x_split (residual added in)
x = Add()([x, x_split])
x = Activation("relu")(x)
x = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Flatten()(x)
x = Dropout(0.25)(x)
x = Dense(256)(x)
x = Activation("relu")(x)
x = Dropout(0.5)(x)
x = Dense(NUM_CLASSES)(x)
x = Activation('softmax')(x)
resnet = Model(inputs=input_holder,outputs=x)
resnet.summary()
resnet.compile(loss='categorical_crossentropy', # 'categorical_crossentropy' 'mean_squared_error'
optimizer='adam', # 'adadelta' 'rmsprop'
metrics=['accuracy'])
BS = 32
epochs = 20
history = resnet.fit_generator(datagen_train.flow(X_train, Y_train, batch_size=BS),
steps_per_epoch=len(X_train)//BS, # how many generators to go through per epoch
epochs=epochs, verbose=1,
validation_data=datagen_val.flow(X_val,Y_val, batch_size=BS),
validation_steps=len(X_val)//BS,
callbacks=[EarlyStopping(monitor='val_loss', patience=4)],
)
plot_history(history)
yhat = np.argmax(resnet.predict(x_test), axis=1)
acc = mt.accuracy_score(y_test,yhat)
cm = mt.confusion_matrix(y_test,yhat)
plot_confusion_matrix(cm,list(label.values()),title= f"CM Test Acc:{acc}")
yhat = np.argmax(resnet.predict(X_train), axis=1)
acc = mt.accuracy_score(np.argmax(Y_train,axis=-1),yhat)
cm = mt.confusion_matrix(np.argmax(Y_train,axis=-1),yhat)
# cm =cm/sum(cm[0])
plot_confusion_matrix(cm,list(label.values()),title=f'CM Train Acc:{acc:0.3f}')
yhat = np.argmax(resnet.predict(X_val), axis=1)
acc = mt.accuracy_score(np.argmax(Y_val,axis=-1),yhat)
cm = mt.confusion_matrix(np.argmax(Y_val,axis=-1),yhat)
# cm =cm/sum(cm[0])
plot_confusion_matrix(cm,list(label.values()),title=f'CM Val Acc: {acc:0.3f}')
Couple of percentage points off from the 6 layer CNN but still good results.
Two separable convolution layer in between.
fig = plt.figure(figsize=(60,6))
img = mpimg.imread('xception1.png')
plt.imshow(img)
plt.show()
# Xception style architecture
from tensorflow.keras.layers import SeparableConv2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Add, Input
from tensorflow.keras.layers import average, concatenate
from tensorflow.keras.models import Model
l2_lambda = 0.000001
input_holder = Input(shape=(img_wh, img_wh, 3))
# start with a conv layer
x = Conv2D(filters=32,
input_shape = (img_wh,img_wh,1),
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(input_holder)
x = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Conv2D(filters=32,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x)
x_split = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = SeparableConv2D(filters=32,
input_shape = (img_wh,img_wh,1),
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
depth_multiplier = 1, # controls output channels
data_format="channels_last")(x_split)
x_split = Add()([x, x_split])
x = SeparableConv2D(filters=32,
input_shape = (img_wh,img_wh,1),
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
depth_multiplier = 1, # controls output channels
data_format="channels_last")(x_split)
x_split = Add()([x, x_split])
x = Activation("relu")(x_split)
x = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Flatten()(x)
x = Dropout(0.25)(x)
x = Dense(256, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(NUM_CLASSES,activation="softmax")(x)
xception = Model(inputs=input_holder,outputs=x)
xception.summary()
# speed up by training by not using augmentation, perhaps there are faster ways??
xception.compile(loss='categorical_crossentropy', # 'categorical_crossentropy' 'mean_squared_error'
optimizer='adam', # 'adadelta' 'rmsprop'
metrics=['accuracy'])
BS = 32
epochs = 50
history = xception.fit_generator(datagen_train.flow(X_train, Y_train, batch_size=BS),
steps_per_epoch=len(X_train)//BS, # how many generators to go through per epoch
epochs=epochs, verbose=1,
validation_data=datagen_val.flow(X_val,Y_val, batch_size=BS),
validation_steps=len(X_val)//BS,
callbacks=[EarlyStopping(monitor='val_loss', patience=4)],
)
plot_history(history)
yhat = np.argmax(xception.predict(x_test), axis=1)
acc = mt.accuracy_score(y_test,yhat)
cm = mt.confusion_matrix(y_test,yhat)
plot_confusion_matrix(cm,list(label.values()),title= f"CM Test Acc:{acc}")
I had trained the Resnet and Xception to 50 epoch but when I reran the notebook I had change the Resnet network to 10 epoch.
Xception is also working properly but for now lets foucs on Resnet.
Changed the Drop out percentage from 25,50 to 85,85.
%%time
# now lets use the LeNet architecture with batch norm
# We will also use ReLU where approriate and drop out
from tensorflow.keras.layers import Add, Input
from tensorflow.keras.layers import average, concatenate
from tensorflow.keras.models import Model
input_holder = Input(shape=(img_wh, img_wh, 3))
# start with a conv layer
x = Conv2D(filters=32,
input_shape = (img_wh,img_wh,1),
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(input_holder)
x = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Conv2D(filters=32,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x)
x_split = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Conv2D(filters=64,
kernel_size=(1,1),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x_split)
x = Conv2D(filters=64,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x)
x = Conv2D(filters=32,
kernel_size=(1,1),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x)
# now add back in the split layer, x_split (residual added in)
x = Add()([x, x_split])
x = Activation("relu")(x)
x = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Flatten()(x)
x = Dropout(0.85)(x)
x = Dense(256)(x)
x = Activation("relu")(x)
x = Dropout(0.85)(x)
x = Dense(NUM_CLASSES)(x)
x = Activation('softmax')(x)
resnet1 = Model(inputs=input_holder,outputs=x)
resnet1.summary()
resnet1.compile(loss='categorical_crossentropy', # 'categorical_crossentropy' 'mean_squared_error'
optimizer='adam', # 'adadelta' 'rmsprop'
metrics=['accuracy'])
BS = 32
epochs = 50
history = resnet1.fit_generator(datagen_train.flow(X_train, Y_train, batch_size=BS),
steps_per_epoch=len(X_train)//BS, # how many generators to go through per epoch
epochs=epochs, verbose=1,
validation_data=datagen_val.flow(X_val,Y_val, batch_size=BS),
validation_steps=len(X_val)//BS,
callbacks=[EarlyStopping(monitor='val_loss', patience=4)],
)
plot_history(history)
yhat = np.argmax(resnet1.predict(x_test), axis=1)
acc = mt.accuracy_score(y_test,yhat)
cm = mt.confusion_matrix(y_test,yhat)
plot_confusion_matrix(cm,list(label.values()),title= f"CM Test Acc:{acc}")
The goal to change the drop out layer was to see the change of drop out in learning, we can see that if the drop is increased then there is more variation in the learning curve itself (more spikes) but it does not change the underfitting we are seeing.
Here I am testing the change in depth multiplier in seperable convolution layer and Adam optmizer paramerters.
# Xception 2
def build_net(depth_multiplier,learning_rate,beta_1,beta_2,epsilon,l2_lambda,dropout):
input_holder = Input(shape=(img_wh, img_wh, 3))
# start with a conv layer
x = Conv2D(filters=32,
input_shape = (img_wh,img_wh,3), # lets keep the 3 channels
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(input_holder)
x = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Conv2D(filters=32,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x)
x_split = MaxPooling2D(pool_size=(1, 1), data_format="channels_last")(x)
x_split = SeparableConv2D(filters=32,
input_shape = (img_wh,img_wh,3),
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
depth_multiplier = depth_multiplier, # controls output channels
data_format="channels_last")(x_split)
x = Add()([x, x_split])
# add 1x1 and activation layer
x_split = MaxPooling2D(pool_size=(1, 1), data_format="channels_last")(x)
x_split = Activation("relu")(x_split)
x_split = SeparableConv2D(filters=32,
input_shape = (img_wh,img_wh,1),
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
depth_multiplier = depth_multiplier, # controls output channels
data_format="channels_last")(x_split)
x = Add()([x, x_split])
x = Activation("relu")(x)
x = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Flatten()(x)
x = Dropout(dropout)(x)
x = Dense(64, activation="relu")(x)
x = Dropout(dropout)(x)
x = Dense(32, activation="relu")(x)
x = Dropout(dropout)(x)
x = Dense(NUM_CLASSES,activation="softmax")(x)
xception2 = Model(inputs=input_holder,outputs=x)
xception2.compile(loss='categorical_crossentropy', # 'categorical_crossentropy' 'mean_squared_error'
optimizer=tf.keras.optimizers.Adam(
learning_rate=learning_rate,
beta_1=beta_1,
beta_2=beta_2,
epsilon=epsilon,
# amsgrad=False,
name="Adam",), # 'adadelta' 'rmsprop'
metrics=['accuracy'])
return xception2
# xception.summary()
xception2 = build_net(depth_multiplier=3,learning_rate=0.001,beta_1=0.9,beta_2=0.999,epsilon=0.00001,l2_lambda=0.000001,dropout=0.5)
xception2.summary()
BS = 32
epochs = 10
history = xception2.fit_generator(datagen_train.flow(X_train, Y_train, batch_size=BS),
steps_per_epoch=len(X_train)//BS, # how many generators to go through per epoch
epochs=epochs, verbose=1,
validation_data=datagen_val.flow(X_val,Y_val, batch_size=BS),
validation_steps=len(X_val)//BS,
callbacks=[EarlyStopping(monitor='val_loss', patience=4)],
)
plot_history(history)
yhat = np.argmax(xception2.predict(x_test), axis=1)
acc = mt.accuracy_score(y_test,yhat)
cm = mt.confusion_matrix(y_test,yhat)
plot_confusion_matrix(cm,list(label.values()),title= f"CM Test Acc:{acc}")
Just testing random combination of hyperparameter, atleast the loss in decresing.
The motivation to change the image size is becasue AlexNet[2] is using images for size 224x224x3 so I decided that since I am testing AlexNet style newtwork I will use the same size images.
TEST_DIR="/users/sbataju/CS5324/tomato/val/"
NUM_CLASSES = 10
index=0
data=[]
for FOLDER in os.listdir(TEST_DIR):
print(FOLDER,inv_label[FOLDER])
for image_dir in os.listdir(TEST_DIR+FOLDER):
data.append({"x":img_to_np(TEST_DIR+FOLDER+"/"+image_dir,flatten=False,newsize=(224,224)),"y":label[inv_label[FOLDER]]})
# assert False
index=index+1
x,y=[],[]
for obj in data:
x.append(obj["x"])
y.append(obj["y"])
x_test1 = np.array(x)
y_test1 = np.array([inv_label[i] for i in y],dtype=np.float16)
TRAIN_DIR = "/users/sbataju/CS5324/tomato/train/"
index=0
data=[]
for FOLDER in os.listdir(TRAIN_DIR):
print(FOLDER,' ',inv_label[FOLDER])
for image_dir in os.listdir(TRAIN_DIR+FOLDER):
data.append({"x":img_to_np(TRAIN_DIR+FOLDER+"/"+image_dir,flatten=False,newsize=(224,224)),"y":label[inv_label[FOLDER]]})
index=index+1
x,y=[],[]
for obj in data:
x.append(obj["x"])
y.append(obj["y"])
x_train1 = np.array(x)
y_train1 = np.array([inv_label[i] for i in y],dtype=np.float16)
y_train_ohe1 = keras.utils.to_categorical(y_train, NUM_CLASSES)
y_test_ohe1 = keras.utils.to_categorical(y_test, NUM_CLASSES)
from sklearn.model_selection import train_test_split
X_train1, X_val1, Y_train1, Y_val1 = train_test_split(
x_train1, y_train_ohe1, test_size=0.1, random_state=42,stratify=y_train,)
X_train1,X_val1 =X_train1/255.0 , X_val1/255.0
x_test1 = x_test1/255.0
datagen_train1 = ImageDataGenerator(
featurewise_center=False,
samplewise_center=False,
featurewise_std_normalization=False,
samplewise_std_normalization=False,
zca_whitening=False,
rotation_range=180, # used, Int. Degree range for random rotations.
width_shift_range=0.2, # used, Float (fraction of total width). Range for random horizontal shifts.
height_shift_range=0.2, # used, Float (fraction of total height). Range for random vertical shifts.
shear_range=0.1, # Float. Shear Intensity (Shear angle in counter-clockwise direction as radians)
zoom_range=0.0,
channel_shift_range=0.,
fill_mode='nearest',
cval=0.,
# brightness_range=[0.2,1],
horizontal_flip=True,
vertical_flip=True,
rescale=None)
datagen_val1 = ImageDataGenerator(
featurewise_center=False,
samplewise_center=False,
featurewise_std_normalization=False,
samplewise_std_normalization=False,
zca_whitening=False,
rotation_range=180, # used, Int. Degree range for random rotations.
width_shift_range=0.2, # used, Float (fraction of total width). Range for random horizontal shifts.
height_shift_range=0.2, # used, Float (fraction of total height). Range for random vertical shifts.
shear_range=0.1, # Float. Shear Intensity (Shear angle in counter-clockwise direction as radians)
zoom_range=0.0,
channel_shift_range=0.,
fill_mode='nearest',
cval=0.,
# brightness_range=[0.2,1],
horizontal_flip=True,
vertical_flip=True,
rescale=None)
datagen_train1.fit(X_train1)
datagen_val1.fit(X_val1)
tmps1 = datagen_train1.flow(X_train1, Y_train1, batch_size=3)
for i,tmp_ in enumerate(tmps1):
# imshow(tmp[i].squeeze(),cmap='bone')
for j,tmp in enumerate(tmp_):
if j == 0:
for k,t in enumerate(tmp):
plt.subplot(1,3,k+1)
plt.imshow(t)
plt.grid()
else:
plt.title(label[np.argmax(tmp)])
if i ==10: break
plt.show()
X_train1[0].shape
I applied all the same perprocessing steps as before and we can see the images are of 224x224x3.
Now let's first test adding batch normalization layer after every convolution layers to the ResNet we had before.
fig = plt.figure(figsize=(60,6))
img = mpimg.imread('resnet_batch1.png')
plt.imshow(img)
plt.show()
%%time
#https://www.kdnuggets.com/2018/09/dropout-convolutional-networks.html
from tensorflow.keras.layers import Add, Input
from tensorflow.keras.layers import average, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
l2_lambda = 0.0001
img_wh = 224
input_holder = Input(shape=(img_wh, img_wh, 3))
# start with a conv layer
x = Conv2D(filters=32,
input_shape = (img_wh,img_wh,1),
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(input_holder)
x = layers.BatchNormalization(axis=-1)(x)
x = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Conv2D(filters=32,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x)
x = layers.BatchNormalization(axis=-1)(x)
x_split = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Conv2D(filters=64,
kernel_size=(1,1),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='LeakyReLU',
data_format="channels_last")(x_split)
x = layers.BatchNormalization(axis=-1)(x)
x = Conv2D(filters=64,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='LeakyReLU',
data_format="channels_last")(x)
x = layers.BatchNormalization(axis=-1)(x)
x = Conv2D(filters=32,
kernel_size=(1,1),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='LeakyReLU',
data_format="channels_last")(x)
x = layers.BatchNormalization(axis=-1)(x)
# now add back in the split layer, x_split (residual added in)
x = Add()([x, x_split])
x = Activation("LeakyReLU")(x)
x = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Flatten()(x)
x = Dropout(0.25)(x)
x = Dense(256)(x)
x = Activation("relu")(x)
x = Dropout(0.5)(x)
x = Dense(NUM_CLASSES)(x)
x = Activation('softmax')(x)
resnet_batch1 = Model(inputs=input_holder,outputs=x)
resnet_batch1.summary()
resnet_batch1.compile(loss='categorical_crossentropy', # 'categorical_crossentropy' 'mean_squared_error'
optimizer='adam', # 'adadelta' 'rmsprop'
metrics=['accuracy'])
BS = 32
epochs = 50
history = resnet_batch1.fit_generator(datagen_train1.flow(X_train1, Y_train1, batch_size=BS),
steps_per_epoch=len(X_train1)//BS, # how many generators to go through per epoch
epochs=epochs, verbose=1,
validation_data=datagen_val1.flow(X_val1,Y_val1, batch_size=BS),
validation_steps=len(X_val1)//BS,
callbacks=[EarlyStopping(monitor='val_loss', patience=5)],
)
plot_history(history)
yhat = np.argmax(resnet_batch1.predict(x_test1), axis=1)
acc = mt.accuracy_score(y_test1,yhat)
cm = mt.confusion_matrix(y_test1,yhat)
plot_confusion_matrix(cm,list(label.values()),title= f"CM Test Acc:{acc}")
Looking at the epoch vs accuracy graphs we can see the underfitting is significantly reduced, there are spikes in validation set but unlike before the validation learning curve is not always greater than the training learning curve. Furthermore, the training curve is continously decreasing as well.
Uptill now I think this network style is most promising given the learning curve so, I am choosing this network to optimize the hyperparameter.
Here is the optmization preformed using Optuna the code used to get these plot is in the bottom of the report.
There is no significant imporvement in accuracy compared to our defult model.
plt.figure(figsize=(18,18))
plt.subplot(1,2,1)
img = mpimg.imread('resnet_batch.png')
plt.imshow(img)
# plt.show()
plt.subplot(1,2,2)
img = mpimg.imread('plot_parallel_coordinate_resnet_batch.png')
plt.imshow(img)
plt.show()
plt.figure(figsize=(18,18))
img = mpimg.imread('plot_slice_resnet_batch.png')
plt.imshow(img)
plt.show()
plt.figure(figsize=(18,18))
img = mpimg.imread('plot_contour_resnet_batch.png')
plt.imshow(img)
plt.show()
plt.figure(figsize=(18,18))
img = mpimg.imread('plot_param_importances_resnet_batch.png')
plt.imshow(img)
plt.show()
I wanted to test what would happend if we not only add the convolution layer to the starting layer but also substract from the starting layer and concatenate all the layer. So that not only features are being encouraged but also non features are also being encouraged.
fig = plt.figure(figsize=(60,6))
img = mpimg.imread('resnet_addsub.png')
plt.imshow(img)
plt.show()
%%time
img_wh = 224
from tensorflow.keras.layers import Add, Input, Subtract
from tensorflow.keras.layers import average, concatenate
from tensorflow.keras.models import Model
input_holder = Input(shape=(img_wh, img_wh, 3))
l2_lambda = 0.0001
img_wh = 224
# start with a conv layer
x = Conv2D(filters=32,
input_shape = (img_wh,img_wh,1),
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(input_holder)
x = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Conv2D(filters=32,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x)
x_split = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
# For add block
x = Conv2D(filters=64,
kernel_size=(1,1),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x_split)
x = Conv2D(filters=64,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x)
x = Conv2D(filters=32,
kernel_size=(1,1),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x)
# For subtract block
x1 = Conv2D(filters=64,
kernel_size=(1,1),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x_split)
x1 = Conv2D(filters=64,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x1)
x1 = Conv2D(filters=32,
kernel_size=(1,1),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x1)
# now add back in the split layer, x_split (residual added in)
x = Add()([x, x_split])
x1 = Subtract()([x_split,x1])
x = concatenate([x,x1])
x = layers.BatchNormalization(axis=-1)(x)
x = Activation("relu")(x)
x = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Flatten()(x)
x = Dropout(0.25)(x)
x = Dense(256)(x)
x = Activation("relu")(x)
x = Dropout(0.5)(x)
x = Dense(NUM_CLASSES)(x)
x = Activation('softmax')(x)
resnet_addsub = Model(inputs=input_holder,outputs=x)
resnet_addsub.summary()
resnet_addsub.compile(loss='categorical_crossentropy', # 'categorical_crossentropy' 'mean_squared_error'
optimizer='adam', # 'adadelta' 'rmsprop'
metrics=['accuracy'])
BS = 64
epochs = 50
history = resnet_addsub.fit_generator(datagen_train1.flow(X_train1, Y_train1, batch_size=BS),
steps_per_epoch=len(X_train1)//BS, # how many generators to go through per epoch
epochs=epochs, verbose=1,
validation_data=datagen_val1.flow(X_val1,Y_val1, batch_size=BS),
validation_steps=len(X_val1)//BS,
callbacks=[EarlyStopping(monitor='val_loss', patience=5)],
)
plot_history(history)
yhat = np.argmax(resnet_addsub.predict(x_test1), axis=1)
acc = mt.accuracy_score(y_test1,yhat)
cm = mt.confusion_matrix(y_test1,yhat)
plot_confusion_matrix(cm,list(label.values()),title= f"CM Test Acc:{acc}")
Surprising there is less underfitting problem but the accuracy as a whole is not good. Let's train it to 50 epochs
BS = 64
epochs = 50
history2 = resnet_addsub.fit_generator(datagen_train1.flow(X_train1, Y_train1, batch_size=BS),
steps_per_epoch=len(X_train1)//BS, # how many generators to go through per epoch
epochs=epochs, verbose=1,
validation_data=datagen_val1.flow(X_val1,Y_val1, batch_size=BS),
validation_steps=len(X_val1)//BS,
callbacks=[EarlyStopping(monitor='loss', patience=5)],
)
plot_history(history2)
yhat = np.argmax(resnet_addsub.predict(x_test1), axis=1)
acc = mt.accuracy_score(y_test1,yhat)
cm = mt.confusion_matrix(y_test1,yhat)
plot_confusion_matrix(cm,list(label.values()),title= f"CM Test Acc:{acc}")
Well, there is underfitting and the accuracy not improving at all, I will leaving this as is.
Modified Alexnet so that start of the last three convolution layer is added to the output as seen here.
fig = plt.figure(figsize=(60,6))
img = mpimg.imread('alex_resnet.png')
plt.imshow(img)
plt.show()
%%time
from tensorflow.keras.layers import Add, Input
from tensorflow.keras.layers import average, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
img_wh = 224
input_holder = Input(shape=(img_wh, img_wh, 3))
# start with a conv layer
# tf.keras.layers.Conv2D(filters=96, kernel_size=11, strides=4,
# activation='relu',input_shape=(224,224,3)),
x = Conv2D(filters=96,
input_shape = (img_wh,img_wh,1),
kernel_size=(11,11),
strides=4,
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(input_holder)
#tf.keras.layers.MaxPool2D(pool_size=3, strides=2),
x = MaxPooling2D(pool_size=(3, 3),strides=2, data_format="channels_last")(x)
#tf.keras.layers.Conv2D(filters=256, kernel_size=5, padding='same',activation='relu')
x = Conv2D(filters=256,
kernel_size=(5,5),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x)
#tf.keras.layers.MaxPool2D(pool_size=3, strides=2),
x_split = MaxPooling2D(pool_size=(3, 3), strides=2, data_format="channels_last")(x)
# tf.keras.layers.Conv2D(filters=384, kernel_size=3, padding='same',activation='relu'),
# tf.keras.layers.Conv2D(filters=384, kernel_size=3, padding='same',activation='relu'),
# tf.keras.layers.Conv2D(filters=256, kernel_size=3, padding='same',activation='relu'),
x = Conv2D(filters=384,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x_split)
x = Conv2D(filters=384,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x)
x = Conv2D(filters=256,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x)
# now add back in the split layer, x_split (residual added in)
x = Add()([x, x_split])
x = Activation("relu")(x)
#tf.keras.layers.MaxPool2D(pool_size=3, strides=2),
x = MaxPooling2D(pool_size=(3, 3),strides=2, data_format="channels_last")(x)
# tf.keras.layers.Flatten(),
# tf.keras.layers.Dense(4096, activation='relu'),
# tf.keras.layers.Dropout(0.5),
# tf.keras.layers.Dense(4096, activation='relu'),
# tf.keras.layers.Dropout(0.5),
# tf.keras.layers.Dense(num_classes)])
x = Flatten()(x)
x = Dropout(0.25)(x)
x = Dense(4096)(x)
x = Activation("relu")(x)
x = Dropout(0.5)(x)
x = Dense(4096)(x)
x = Activation("relu")(x)
x = Dropout(0.5)(x)
x = Dense(NUM_CLASSES)(x)
x = Activation('softmax')(x)
alex_resnet = Model(inputs=input_holder,outputs=x)
alex_resnet.summary()
alex_resnet.compile(loss='categorical_crossentropy', # 'categorical_crossentropy' 'mean_squared_error'
optimizer='adam', # 'adadelta' 'rmsprop'
metrics=['accuracy'])
BS = 128
epochs = 50
history = alex_resnet.fit_generator(datagen_train1.flow(X_train1, Y_train1, batch_size=BS),
steps_per_epoch=len(X_train)//BS, # how many generators to go through per epoch
epochs=epochs, verbose=1,
validation_data=datagen_val1.flow(X_val1,Y_val1, batch_size=BS),
validation_steps=len(X_val)//BS,
callbacks=[EarlyStopping(monitor='val_loss', patience=5)],
)
plot_history(history)
yhat = np.argmax(alex_resnet.predict(x_test1), axis=1)
acc = mt.accuracy_score(y_test1,yhat)
cm = mt.confusion_matrix(y_test1,yhat)
plot_confusion_matrix(cm,list(label.values()),title= f"CM Test Acc:{acc:0.3f}")
# X_train1, Y_train1,
yhat = np.argmax(alex_resnet.predict(X_train1), axis=1)
acc = mt.accuracy_score(np.argmax(Y_train1, axis=1),yhat)
cm = mt.confusion_matrix(np.argmax(Y_train1, axis=1),yhat)
plot_confusion_matrix(cm,list(label.values()),title= f"CM Train Acc:{acc:0.3f}")
# X_train1, Y_train1,
yhat = np.argmax(alex_resnet.predict(X_val1), axis=1)
acc = mt.accuracy_score(np.argmax(Y_val1, axis=1),yhat)
cm = mt.confusion_matrix(np.argmax(Y_val1, axis=1),yhat)
plot_confusion_matrix(cm,list(label.values()),title= f"CM Val Acc:{acc:0.3f}")
This network is the best performing of all the CNN I have tried. I don't see underfitting or overfitting and the network is converging to high accuracy greater than ~90%
https://stackoverflow.com/questions/51336761/how-to-do-transfer-learning-on-our-own-models
Showing trasfer learing in the AlexNet + ResNet I have above. First, save and load the model.
alex_resnet.save('alex_resnet.h5')
# Recreate the exact same model, including its weights and the optimizer
new_model_alex_resnet = tf.keras.models.load_model('alex_resnet.h5')
# Show the model architecture
new_model_alex_resnet.summary()
yhat = np.argmax(new_model_alex_resnet.predict(x_test1), axis=1)
acc = mt.accuracy_score(y_test1,yhat)
cm = mt.confusion_matrix(y_test1,yhat)
plot_confusion_matrix(cm,list(label.values()),title= f"CM Test Acc:{acc:0.3f}")
Testing the loaded model, we see it give the same accuracy as the model we had tested before. Now, freezing the weights for the convolution layers and add dense layer with changed nodes from 4096 to 1024 and enable learning in the bottom layer.
fig = plt.figure(figsize=(60,6))
img = mpimg.imread('new_model.png')
plt.imshow(img)
plt.show()
print("Number of layers in the base model: ", len(new_model_alex_resnet.layers))
# Fine-tune from this layer onwards
fine_tune_at = 11
# Freeze all the layers before the `fine_tune_at` layer
for layer in new_model_alex_resnet.layers[:fine_tune_at]:
layer.trainable = False
for layer in new_model_alex_resnet.layers[fine_tune_at:]:
layer.trainable = True
new_model_alex_resnet.summary()
add_x = new_model_alex_resnet.layers[fine_tune_at].output
add_x = Dropout(0.25)(add_x)
add_x = Dense(1024)(add_x)
add_x = Activation("relu")(add_x)
add_x = Dropout(0.5)(add_x)
add_x = Dense(NUM_CLASSES)(add_x)
add_x = Activation('softmax')(add_x)
new_model = Model(inputs=new_model_alex_resnet.input,outputs=add_x)
new_model.summary()
# https://www.tensorflow.org/tutorials/images/transfer_learning#compile_the_model_2
#Decrease the learning rate.
new_model.compile(loss='categorical_crossentropy', # 'categorical_crossentropy' 'mean_squared_error'
optimizer=tf.keras.optimizers.Adam(learning_rate=0.001/10), # 'adadelta' 'rmsprop'
metrics=['accuracy'])
BS = 128
epochs = 25
history1 = new_model.fit_generator(datagen_train1.flow(X_train1, Y_train1, batch_size=BS),
steps_per_epoch=len(X_train)//BS, # how many generators to go through per epoch
epochs=epochs, verbose=1,
validation_data=datagen_val1.flow(X_val1,Y_val1, batch_size=BS),
validation_steps=len(X_val)//BS,
callbacks=[EarlyStopping(monitor='val_loss', patience=5)],
)
plot_history(history1)
yhat = np.argmax(new_model.predict(x_test1), axis=1)
acc = mt.accuracy_score(y_test1,yhat)
cm = mt.confusion_matrix(y_test1,yhat)
plot_confusion_matrix(cm,list(label.values()),title= f"CM Test Acc:{acc:0.3f}")
# X_train1, Y_train1,
yhat = np.argmax(new_model.predict(X_train1), axis=1)
acc = mt.accuracy_score(np.argmax(Y_train1, axis=1),yhat)
cm = mt.confusion_matrix(np.argmax(Y_train1, axis=1),yhat)
plot_confusion_matrix(cm,list(label.values()),title= f"CM Train Acc:{acc:0.3f}")
# X_train1, Y_train1,
yhat = np.argmax(new_model.predict(X_val1), axis=1)
acc = mt.accuracy_score(np.argmax(Y_val1, axis=1),yhat)
cm = mt.confusion_matrix(np.argmax(Y_val1, axis=1),yhat)
plot_confusion_matrix(cm,list(label.values()),title= f"CM Val Acc:{acc:0.3f}")
Pretty nice results. Compared to the original network the result is very similar, just slight decrease (~2%).
The ResNet152V2 is very large to plot efficively here. Here I am testing this network, only taking the top half and adding one dense layer.
fig = plt.figure(figsize=(60,10))
img = mpimg.imread('model_ResNet.png')
plt.imshow(img)
plt.show()
from tensorflow.keras.applications import ResNet152V2
from tensorflow.keras.layers import GlobalAveragePooling2D
def get_ResNet152V2():
base_model = ResNet152V2(input_shape=(224,224,3), include_top=False)
for layers in base_model.layers[:140]:
layers.trainable = False
for layers in base_model.layers[140:]:
layers.trainable = True
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1000, activation='relu')(x)
pred = Dense(10, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=pred)
return model
model = get_ResNet152V2()
model.summary()
model.compile(loss='categorical_crossentropy', # 'categorical_crossentropy' 'mean_squared_error'
optimizer='rmsprop', # 'adadelta' 'rmsprop'
metrics=['accuracy'])
BS = 16
epochs = 10
history2 = model.fit_generator(datagen_train1.flow(X_train1, Y_train1, batch_size=BS),
steps_per_epoch=len(X_train)//BS, # how many generators to go through per epoch
epochs=epochs, verbose=1,
validation_data=datagen_val1.flow(X_val1,Y_val1, batch_size=BS),
validation_steps=len(X_val)//BS,
callbacks=[EarlyStopping(monitor='val_loss', patience=3)],
)
plot_history(history2)
yhat = np.argmax(model.predict(x_test1), axis=1)
acc = mt.accuracy_score(y_test1,yhat)
cm = mt.confusion_matrix(y_test1,yhat)
plot_confusion_matrix(cm,list(label.values()),title= f"CM Test Acc:{acc:0.3f}")
# X_train1, Y_train1,
yhat = np.argmax(model.predict(X_train1), axis=1)
acc = mt.accuracy_score(np.argmax(Y_train1, axis=1),yhat)
cm = mt.confusion_matrix(np.argmax(Y_train1, axis=1),yhat)
plot_confusion_matrix(cm,list(label.values()),title= f"CM Train Acc:{acc:0.3f}")
# X_train1, Y_train1,
yhat = np.argmax(model.predict(X_val1), axis=1)
acc = mt.accuracy_score(np.argmax(Y_val1, axis=1),yhat)
cm = mt.confusion_matrix(np.argmax(Y_val1, axis=1),yhat)
plot_confusion_matrix(cm,list(label.values()),title= f"CM Val Acc:{acc:0.3f}")
%%time
dnn = Sequential()
# add one layer on flattened output
# cnn.add(Dropout(0.25)) # add some dropout for regularization after conv layers
dnn.add(Flatten(input_shape = (img_wh,img_wh,3)))
# dnn.add(Dropout(0.25))
dnn.add(Dense(1024, activation='relu'))
# dnn.add(Dropout(0.25))
dnn.add(Dense(512, activation='relu'))
dnn.add(Dense(268, activation='relu'))
# dnn.add(Dropout(0.5))
dnn.add(Dense(128, activation='relu'))
# dnn.add(Dropout(0.5)) # add some dropout for regularization, again!
dnn.add(Dense(NUM_CLASSES, activation='softmax'))
# Let's train the model
dnn.compile(loss='categorical_crossentropy', # 'categorical_crossentropy' 'mean_squared_error'
optimizer='rmsprop', # 'adadelta' 'rmsprop'
metrics=['accuracy'])
dnn.summary()
BS = 32
epochs = 20
historyd = dnn.fit_generator(datagen_train.flow(X_train, Y_train, batch_size=BS),
steps_per_epoch=len(X_train)//BS, # how many generators to go through per epoch
epochs=epochs, verbose=1,
validation_data=datagen_val.flow(X_val,Y_val, batch_size=BS),
validation_steps=len(X_val)//BS,
callbacks=[EarlyStopping(monitor='val_loss', patience=3)],
)
plot_history(historyd)
plt.show()
yhat = np.argmax(dnn.predict(x_test), axis=1)
acc = mt.accuracy_score(y_test,yhat)
cm = mt.confusion_matrix(y_test,yhat)
plot_confusion_matrix(cm,list(label.values()),title= f"CM Test Acc:{acc}")
plt.show()
# X_train1, Y_train1,
yhat = np.argmax(dnn.predict(X_train), axis=1)
acc = mt.accuracy_score(np.argmax(Y_train, axis=1),yhat)
cm = mt.confusion_matrix(np.argmax(Y_train, axis=1),yhat)
plot_confusion_matrix(cm,list(label.values()),title= f"CM Train Acc:{acc:0.3f}")
plt.show()
# X_train1, Y_train1,
yhat = np.argmax(dnn.predict(X_val), axis=1)
acc = mt.accuracy_score(np.argmax(Y_val, axis=1),yhat)
cm = mt.confusion_matrix(np.argmax(Y_val, axis=1),yhat)
plot_confusion_matrix(cm,list(label.values()),title= f"CM Val Acc:{acc:0.3f}")
plt.show()
Comparing Deep Nural Network with the AlexNet + ResNet at 15 epoch the CNN is doing better than DNN, showing CNN are better for image classification. Althought, I should have looked at more epoch for DNN.
In this Lab, I have tried to first test the example CNNs in the class notebook. I also looked at changing the drop out rate in ResNet, then couple of Xception style networks were tested. After increasing the size of the images and including Batch Normalization, AlexNet+ResNet was tested giving the best result. It can be also be concluded that Batch Normalization helps with underfitting network.
I also tested transfer learning from the top convolution network from AlexNet+ResNet and decreased the dense layer at the bottom, this hurt the accuracy slightly. And, lastly I used transfer learning on the top part of ResNet152V2 with good result.
[1] https://www.kaggle.com/datasets/kaustubhb999/tomatoleaf
[2] 2012. Proceedings of the 25th International Conference on Neural Information Processing Systems - Volume 1. Curran Associates Inc., Red Hook, NY, USA. https://proceedings.neurips.cc/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf
Below is the code discussed in CNN with Batch Normalization Optmization.
import optuna
from optuna.visualization import plot_contour
from optuna.visualization import plot_edf
from optuna.visualization import plot_intermediate_values
from optuna.visualization import plot_optimization_history
from optuna.visualization import plot_parallel_coordinate
from optuna.visualization import plot_param_importances
from optuna.visualization import plot_slice
import tensorflow as tf
import os
from tensorflow.keras.layers import Add, Input
from tensorflow.keras.layers import average, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2
import cv2
from matplotlib import pyplot as plt
import numpy as np
import os
from sklearn import metrics as mt
import matplotlib.image as mpimg
from keras.models import Sequential
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation, Flatten, Dropout, Dense
from tensorflow.keras.layers import BatchNormalization
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import LabelBinarizer
# import seaborn as sns
from skimage.io import imshow
from sklearn.metrics import plot_confusion_matrix
from tensorflow.keras.utils import plot_model
from skimage.transform import resize
# import pandas as pd
import tensorflow.keras as keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Reshape, Input
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import average
from tensorflow.keras.models import Model
from PIL import Image
plt.style.use('ggplot')
print("Num of GPUs available: ", len(tf.test.gpu_device_name()))
def img_to_np(DIR,flatten=True,newsize = (128,128)):
img = Image.open(DIR)
img = img.resize(newsize)
return np.asarray(img)
def build_net1(img_wh,learning_rate,beta_1,beta_2,epsilon,l2_lambda,dropout):
input_holder = Input(shape=(img_wh, img_wh, 3))
# start with a conv layer
x = Conv2D(filters=32,
input_shape = (img_wh,img_wh,1),
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(input_holder)
x = layers.BatchNormalization(axis=-1)(x)
x = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Conv2D(filters=32,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='relu',
data_format="channels_last")(x)
x = layers.BatchNormalization(axis=-1)(x)
x_split = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Conv2D(filters=64,
kernel_size=(1,1),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='LeakyReLU',
data_format="channels_last")(x_split)
x = layers.BatchNormalization(axis=-1)(x)
x = Conv2D(filters=64,
kernel_size=(3,3),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='LeakyReLU',
data_format="channels_last")(x)
x = layers.BatchNormalization(axis=-1)(x)
x = Conv2D(filters=32,
kernel_size=(1,1),
kernel_initializer='he_uniform',
kernel_regularizer=l2(l2_lambda),
padding='same',
activation='LeakyReLU',
data_format="channels_last")(x)
x = layers.BatchNormalization(axis=-1)(x)
# now add back in the split layer, x_split (residual added in)
x = Add()([x, x_split])
x = Activation("LeakyReLU")(x)
x = MaxPooling2D(pool_size=(2, 2), data_format="channels_last")(x)
x = Flatten()(x)
x = Dropout(dropout)(x)
x = Dense(256)(x)
x = Activation("relu")(x)
x = Dropout(dropout)(x)
x = Dense(NUM_CLASSES)(x)
x = Activation('softmax')(x)
resnet_batch1 = Model(inputs=input_holder,outputs=x)
resnet_batch1.compile(loss='categorical_crossentropy', # 'categorical_crossentropy' 'mean_squared_error'
optimizer=tf.keras.optimizers.Adam(
learning_rate=learning_rate,
beta_1=beta_1,
beta_2=beta_2,
epsilon=epsilon,
# amsgrad=False,
name="Adam",), # 'adadelta' 'rmsprop'
metrics=['accuracy'])
return resnet_batch1
TEST_DIR="/users/sbataju/CS5324/tomato/val/"
label={}
index=0
for FOLDER in os.listdir(TEST_DIR):
# print(FOLDER,index)
label[index]=FOLDER
index=index+1
inv_label = {v: k for k, v in label.items()}
NUM_CLASSES = 10
index=0
data=[]
for FOLDER in os.listdir(TEST_DIR):
print(FOLDER,inv_label[FOLDER])
for image_dir in os.listdir(TEST_DIR+FOLDER):
data.append({"x":img_to_np(TEST_DIR+FOLDER+"/"+image_dir,flatten=False,newsize=(224,224)),"y":label[inv_label[FOLDER]]})
# assert False
index=index+1
x,y=[],[]
for obj in data:
x.append(obj["x"])
y.append(obj["y"])
x_test1 = np.array(x)
y_test1 = np.array([inv_label[i] for i in y],dtype=np.float16)
TRAIN_DIR = "/users/sbataju/CS5324/tomato/train/"
index=0
data=[]
for FOLDER in os.listdir(TRAIN_DIR):
print(FOLDER,' ',inv_label[FOLDER])
for image_dir in os.listdir(TRAIN_DIR+FOLDER):
data.append({"x":img_to_np(TRAIN_DIR+FOLDER+"/"+image_dir,flatten=False,newsize=(224,224)),"y":label[inv_label[FOLDER]]})
index=index+1
x,y=[],[]
for obj in data:
x.append(obj["x"])
y.append(obj["y"])
x_train1 = np.array(x)
y_train1 = np.array([inv_label[i] for i in y],dtype=np.float16)
y_train_ohe1 = keras.utils.to_categorical(y_train1, NUM_CLASSES)
y_test_ohe1 = keras.utils.to_categorical(y_test1, NUM_CLASSES)
from sklearn.model_selection import train_test_split
X_train1, X_val1, Y_train1, Y_val1 = train_test_split(
x_train1, y_train_ohe1, test_size=0.1, random_state=42,stratify=y_train1)
X_train1,X_val1 =X_train1/255.0 , X_val1/255.0
x_test1 = x_test1/255.0
datagen_train1 = ImageDataGenerator(
featurewise_center=False,
samplewise_center=False,
featurewise_std_normalization=False,
samplewise_std_normalization=False,
zca_whitening=False,
rotation_range=180, # used, Int. Degree range for random rotations.
width_shift_range=0.2, # used, Float (fraction of total width). Range for random horizontal shifts.
height_shift_range=0.2, # used, Float (fraction of total height). Range for random vertical shifts.
shear_range=0.1, # Float. Shear Intensity (Shear angle in counter-clockwise direction as radians)
zoom_range=0.0,
channel_shift_range=0.,
fill_mode='nearest',
cval=0.,
# brightness_range=[0.2,1],
horizontal_flip=True,
vertical_flip=True,
rescale=None)
datagen_val1 = ImageDataGenerator(
featurewise_center=False,
samplewise_center=False,
featurewise_std_normalization=False,
samplewise_std_normalization=False,
zca_whitening=False,
rotation_range=180, # used, Int. Degree range for random rotations.
width_shift_range=0.2, # used, Float (fraction of total width). Range for random horizontal shifts.
height_shift_range=0.2, # used, Float (fraction of total height). Range for random vertical shifts.
shear_range=0.1, # Float. Shear Intensity (Shear angle in counter-clockwise direction as radians)
zoom_range=0.0,
channel_shift_range=0.,
fill_mode='nearest',
cval=0.,
# brightness_range=[0.2,1],
horizontal_flip=True,
vertical_flip=True,
rescale=None)
datagen_train1.fit(X_train1)
datagen_val1.fit(X_val1)
# %%time
study_name = "resnet_batch" # Unique identifier of the study.
CV_RESULT_DIR = os.getcwd()+f"/{study_name}/"
if not os.path.exists(CV_RESULT_DIR): os.mkdir(CV_RESULT_DIR)
storage_name = "sqlite:///{}.db".format(study_name)
import json
def objective(trial):
param = {
"learning_rate": trial.suggest_float("learning_rate",1e-8,1,log=True),
"beta_1": trial.suggest_float("beta_1",1e-8,1,log=True),
"beta_2": trial.suggest_float("beta_2",1e-8,1,log=True),
"epsilon" : trial.suggest_float("epsilon",1e-8,1,log=True),
"l2_lambda": trial.suggest_float("l2_lambda", 1e-8,1, log=True),
"dropout": trial.suggest_float("dropout", 0.1,0.99),
"img_wh": 224,
}
resnet_batch2 = build_net1(**param)
BS = 128
epochs = 25
history = resnet_batch2.fit_generator(datagen_train1.flow(X_train1, Y_train1, batch_size=BS),
steps_per_epoch=len(X_train1)//BS, # how many generators to go through per epoch
epochs=epochs, verbose=1,
validation_data=datagen_val1.flow(X_val1,Y_val1, batch_size=BS),
validation_steps=len(X_val1)//BS,
callbacks=[EarlyStopping(monitor='val_loss', patience=5)],
)
# clf = LineSearchLogisticRegressionMulit(**param)
# clf.fit(x_trains,y_trains)
# yhat = clf.predict(x_trainsv)
# acc = accuracy_score(y_trainsv,yhat)
print(history.history)
with open(CV_RESULT_DIR+f"{trial.number}.json", "w") as outfile:
json.dump(history.history, outfile)
# with open(CV_RESULT_DIR+f'trial_{trial.number}.txt','w') as data:
# data.write(str(history.history))
print(history.history['accuracy'][-1])
return history.history['accuracy'][-1]
# pruner = optuna.pruners.MedianPruner(n_warmup_steps=5)
# pruner = optuna.pruners.HyperbandPruner()
study = optuna.create_study(direction="maximize",storage=storage_name,study_name=study_name)
study.optimize(objective, n_trials=50)
print("Best trial:")
trial = study.best_trial
print(" Value: {}".format(trial.value))
print(" Params: ")
for key, value in trial.params.items():
print(" {}: {}".format(key, value))
Plot the Hyperparameter optmization plotes
from optuna.visualization import plot_contour
from optuna.visualization import plot_edf
from optuna.visualization import plot_intermediate_values
from optuna.visualization import plot_optimization_history
from optuna.visualization import plot_parallel_coordinate
from optuna.visualization import plot_param_importances
from optuna.visualization import plot_slice
loaded_study = optuna.load_study(study_name="resnet_batch", storage="sqlite:///resnet_batch.db")
plot_optimization_history(loaded_study).write_image("resnet_batch.png")
plot_slice(loaded_study).write_image("plot_slice_resnet_batch.png")
plot_parallel_coordinate(loaded_study).write_image("plot_parallel_coordinate_resnet_batch.png")
plot_param_importances(loaded_study).write_image("plot_param_importances_resnet_batch.png")